# -*- coding:utf-8 -*-

# @Time    : 2023/10/27 16:32
# @Author  : zengwenjia
# @Email   : zengwenjia@lingxi.ai
# @File    : label.py
# @Software: LLM_internal

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
import pandas as pd
import json

# 把训练数据转换成标注格式
def convert_data_to_label(data_path, label_path):
    """
    把训练数据转换成标注格式
    :param data_path: 训练数据路径
    :param label_path: 标注数据路径
    :return:
    """
    data = json.load(open(data_path, 'r', encoding='utf-8'))
    # 将data 转成成csv
    datas = pd.DataFrame(data)
    result_list = []
    for index, row in datas.iterrows():
        conversations = row["conversations"]
        result_row = []
        record_index = 0
        json_type = False
        for conversation in conversations:

            record_dict = {}
            if conversation["from"] == "human":
                value = conversation["value"]
                record_dict["human"] = value
            elif conversation["from"] == "gpt":
                value = conversation["value"]
                record_dict["gpt"] = value
                if json_type:
                    try:
                        json_value = json.loads(value)
                        json_value_result = {}
                        if "有无APP" in json_value:
                            json_value_result["有无APP"] = json_value["有无APP"]
                        json_value = json_value_result
                        if "未知" in json_value.values():
                            json_value = {}
                    except Exception as e:
                        print(e)
                        json_value = {}
                    value = json_value
                    record_dict["gpt"] = value

            if record_index == 0:
                if "你好。" == str(value):
                    break
                if "输出格式为json格式：key: value格式" in value:
                    json_type = True

            result_row.append(record_dict)
            record_index = record_index + 1
        if result_row:
            result_list.append(result_row)
    print(result_list[0])
    # 将result_list 转成成csv
    result = pd.DataFrame(result_list)
    print(result.head(2))
    result.to_csv(label_path, index=False, header=False)

if __name__ == "__main__":
    convert_data_to_label("/Users/zengwenjia/Downloads/financial_data_1027.json", "/Users/zengwenjia/Downloads/financial_data_1027_new.csv")